A. Number

1)Frequency of tickets in five boroughs (Manhattan, Kings, The Bronx, State Island, Queens)

violation = read_csv("./Open_Parking_and_Camera_Violations.csv") %>% 
  janitor::clean_names() %>%
  rename(borough = county) %>%  # rename county to borough
  mutate(
    borough = case_when(
      borough %in% c("BK","K", "Kings") ~ "Brooklyn",
      borough %in% c("BX", "Bronx") ~ "Bronx",
      borough %in% c("Q", "QN", "Qns") ~ "Queens",
      borough %in% c("ST", "R", "Rich", "RICH") ~ "State Island",
      borough %in% c("NY", "MN") ~ "Manhattan"),
      issue_date = as.Date(issue_date, format = "%m/%d/%y"),
      weekday = weekdays(issue_date),
      year = year(issue_date),
      month = month(issue_date),
      day = day(issue_date)
    )  %>%  # make the borough the same 
  filter(borough != "A",                  # get rid of "A"
         weekday != "NA",
         month != "11",
         month != "12")  # remove data that cannot turn into weekday
## Rows: 1770806 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): Plate, State, License Type, Issue Date, Violation Time, Violation,...
## dbl  (7): Summons Number, Fine Amount, Penalty Amount, Interest Amount, Redu...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# another potential cleaning 1)select the variables we needed 2)convert the mon into character ?

violation %>% 
  count(borough) %>% 
  mutate(
    borough = fct_reorder(borough, n)) %>% 
  plot_ly(x = ~borough, y = ~n, color = ~borough, type = "bar", colors = "viridis") %>% 
  layout(title = "Frequency of Tickets in Boroughs in 2021",
         xaxis = list(title = "Borough"),
         yaxis = list(title = "Number of Tickets"))
violation %>% 
  group_by(borough) %>%
  count(month) %>% 
  mutate(month = month.abb[as.numeric(month)],
         month = fct_relevel(month, c("Jan", "Feb", "Mar", "Apr","May","Jun", "Jul", "Aug", "Sep", "Oct"))) %>% 
      plot_ly(x = ~month, y = ~n, color = ~borough, type = "bar", colors = "viridis")%>%
  layout(title = "Frequency of Tickets in Boroughs in 2021",
         xaxis = list(title = "Borough"),
         yaxis = list(title = "Number of Tickets in Each Month in 2021"))

Frequency of violation type in each borough.

violation_type = violation %>% 
  group_by(borough) %>% 
  count(violation) %>% 
  mutate(
    violation = fct_reorder(violation, n)
  ) %>% 
  arrange(desc(n)) %>% 
  mutate(index = row_number()) %>% 
  filter(index <= 10) %>% 
  ggplot(aes(x = violation, y = n, fill = violation)) + 
  geom_bar(stat = "identity") + facet_grid(. ~ borough) +
  labs(
    title = "Frequency of Violation Type in Each Borough", 
    xlab = "Violation Type", 
    ylab = "Number of tickets") +
  theme(
    axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1),
    legend.text = element_text(size = 8)
  )

ggplotly(violation_type)